# encoding=utf-8
from __future__ import print_function

import pandas as pd
from scipy.interpolate import lagrange

inputfile = '../dataSets/chapter4/data/catering_sale.xls'
outputfile = '../tmp/sales.xls'

data = pd.read_excel(inputfile)

data[u'销量'][(data[u'销量'] <400 ) | (data[u'销量'] >5000 )] = None

# print(data)


def ployinterp_column(s, n, k=5):
    """
    列向量差值函数
    :param s: 列向量
    :param n: 被插值位置
    :param k: 前后取数据个数
    :return: 插值结果
    """
    y = s[list(range(n-k, n)) + list(range(n+1, n+1+k))]
    print(y)
    y = y[y.notnull()]

    return lagrange(y.index, list(y))[n]

for i in data.columns:
    for j in range(len(data)):
        if (data[i].isnull())[j]:
            data[i][j] = ployinterp_column(data[i], j)

data.to_excel(outputfile)

